
**merge interval data with web data

use Data/daily_merged.dta, clear

xtset account_number read_date

gen date = mofd(read_date)
gen ldailykWh =ln(dailykWh)

rename opened_date opened_date_original
merge m:1 account_number date using Data/webuse_050913.dta
drop if _merge==2


drop _merge id alt_id 

**fill in missings. 
foreach var in NEmailOpen NEmailSent NClicked Totalloggedin Totalvisits  {
	replace `var' =0 if `var'==.
	}


foreach var in NEmailOpen NEmailSent NClicked {
	rename `var'  `var'_monthly
	}

		
* fill in missing date variables
foreach var in received_date opened_date clicked_date activated_date loggedin_date visited_date accessed_date Totalloggedin Totalvisits {
by account_number, sort: egen t = min(`var')
replace `var'= t 
drop t
}

gen lkWh_using= ldailykWh
gen date_using=read_date

replace opened_date = min(opened_date, opened_date_original)
replace received_date = min(received_date, opened_date_original)
replace accessed_date = max(received_date, accessed_date) if received_date~=. & accessed_date~=.

merge m:1 account_number using Data/survey, keepusing(relative_usage relative_change_in_usage relative_peak_usage answeredsomequestions home_num_people home_num_bedrooms)

drop if _merge==2

drop _merge


* recode the relative use variable into quantiles
gen quantiles_prior = 1 if relative_usage==5
replace quantiles_prior=2 if relative_usage==3
replace quantiles_prior=3 if relative_usage==2
replace quantiles_prior=4 if relative_usage==1
replace quantiles_prior=5 if relative_usage==4

label define quantiles 1 "1-20%" 2 "20-40%" 3 "40-60%" 4 "60-80%" 5 "80-100%"
label values quantiles_prior quantiles

label define peak_usage 1 "0-15%" 2 "15-30%" 3 "30-50%" 4 "50-70%" 5 "70-85%" 6 "85%-100%"
label values relative_peak_usage peak_usage

gen relative_change_12months = 1 if relative_change_in_usage == 1
replace relative_change_12months = 2 if relative_change_in_usage == 4
replace relative_change_12months = 3 if relative_change_in_usage == 3
replace relative_change_12months = 4 if relative_change_in_usage == 5
replace relative_change_12months = 5 if relative_change_in_usage == 2 

label define relative_change 1 "Reduced by 20%" 2 "Reduced by 5-15%" 3 "No change" 4 "Increased by 5-15%" 5 "Increased by 20%"



* GLOBAL sample restrictions
drop if solar ==2
drop if plantype=="Business"
drop if product_code=="BUSINESS"
drop if product_code=="BUSINESS50RB"
drop if product_code=="BUSINESS100RB"


* fix up first and last observed dates
rename firstreading firstreading_raw
gen firstreading_date=date(firstreading_raw,"DM20Y")

by account_number, sort: egen firstobserved_date =min(read_date) 
by account_number, sort: egen lastobserved_date =max(read_date) 


keep account_number ever_viewedconserve nmi read_date dailykWh registers controlledload large_use1 large_use2 large_user1 large_user2 control2 contract_start_date_dmy frmp_date postcode openedcampaignemail receivedcampaignemail clickedcampaignemail firstopenedcampaignemail firstobserved_date lastobserved_date ldailykWh NEmailOpen NEmailSent NClicked Totalloggedin loggedin_date Totalvisits visited_date received_date opened_date clicked_date activated_date accessed_date relative_peak relative_change_12months answeredsomequestions home_num_people home_num_bedrooms quantiles_prior solar

save Data/daily_survey_web.dta, replace

